
################################################################# 
# Vladimir Chlouba, Jan Pierskalla & Erik Wibbels
# vchlouba@nd.edu
# December 2022
#################################################################
# Historical Exposure to Statehood, Ethnic Exclusion, and 
# Compliance With the State
# Plots Replication R Script
################################################################

# loading the necessary R packages
library(ggplot2)
library(foreign)
library(data.table)
library(stargazer)
library(lme4)
library(arm)
library(memisc)
library(vegan)
library(MASS)
library(nnet)
library(spikeSlabGAM)
library(speedglm)
library(biglm)
library(ordinal)
library(erer)
library(mnlogit)
library(mlogit)
library(scales)
library(nnet)
library(haven)
library(lfe)
library(GISTools)
library(rgdal)
library(sp)
library(raster)
library(maptools)
library(rgeos)
library(lattice)
library(spdep)
library(cshapes)
library(countrycode)
library(margins)
library(ggmap)
library(dplyr)
library(lmtest)
library(sandwich)
library(effects)
library(gridExtra)

rm(list = ls())
options(stringsAsFactors = FALSE)
#~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~

# loading the dataset (this has to be changed based on where replication dataset is located)
afro.master <- read.csv("AB_data_replication.csv")

#~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~


colnames(afro.master)[colnames(afro.master) == "status"] ="exclusion"

# creating lists of variables
control_list_grid_pre <- "distw05_new+wateraccess+soil_quality+abslat+mnt2000+avgprec+prec2+avgtemp+malaria+share+exclusion+exclusion*distw05_new"
control_list_grid_post <- "+capdist+bdist1+conflict+lpop+loglightsavg+oil_dum"
control_list_indiv_pre <- "+female+age+age2"
control_list_indiv_post <- "+female+age+age2+education+employed+urban"

dv_list <- c("gov_tax", "gov_courts","gov_law")

afro.master$fcountry <- as.factor(afro.master$country)

afro.master$fethnic_id <- as.factor(afro.master$ethnic_id)


######################################################################
# Figure 2: Conditional Quasi-Voluntary Compliance (Marginal Effect
# Plots)
######################################################################

fmla1a <- as.formula(paste(paste(dv_list[1],"~"),
                           paste(control_list_grid_pre),
                           paste(control_list_indiv_pre),
                           paste("+ fcountry")))

fmla1b <- as.formula(paste(paste(dv_list[1],"~"),
                           paste(control_list_grid_pre),
                           paste(control_list_grid_post),
                           paste(control_list_indiv_post),
                           paste("+ fcountry")))

fmla1c <- as.formula(paste(paste(dv_list[1],"~"),
                           paste(control_list_grid_pre),
                           paste(control_list_indiv_pre),
                           paste("+ fcountry + fethnic_id")))

fmla1d <- as.formula(paste(paste(dv_list[1],"~"),
                           paste(control_list_grid_pre),
                           paste(control_list_grid_post),
                           paste(control_list_indiv_post),
                           paste("+ fcountry + fethnic_id")))

fmla2a <- as.formula(paste(paste(dv_list[2],"~"),
                           paste(control_list_grid_pre),
                           paste(control_list_indiv_pre),
                           paste("+ fcountry")))

fmla2b <- as.formula(paste(paste(dv_list[2],"~"),
                           paste(control_list_grid_pre),
                           paste(control_list_grid_post),
                           paste(control_list_indiv_post),
                           paste("+ fcountry")))

fmla2c <- as.formula(paste(paste(dv_list[2],"~"),
                           paste(control_list_grid_pre),
                           paste(control_list_indiv_pre),
                           paste("+ fcountry + fethnic_id")))

fmla2d <- as.formula(paste(paste(dv_list[2],"~"),
                           paste(control_list_grid_pre),
                           paste(control_list_grid_post),
                           paste(control_list_indiv_post),
                           paste("+ fcountry + fethnic_id")))

fmla3a <- as.formula(paste(paste(dv_list[3],"~"),
                           paste(control_list_grid_pre),
                           paste(control_list_indiv_pre),
                           paste("+ fcountry")))

fmla3b <- as.formula(paste(paste(dv_list[3],"~"),
                           paste(control_list_grid_pre),
                           paste(control_list_grid_post),
                           paste(control_list_indiv_post),
                           paste("+ fcountry")))

fmla3c <- as.formula(paste(paste(dv_list[3],"~"),
                           paste(control_list_grid_pre),
                           paste(control_list_indiv_pre),
                           paste("+ fcountry + fethnic_id")))

fmla3d <- as.formula(paste(paste(dv_list[3],"~"),
                           paste(control_list_grid_pre),
                           paste(control_list_grid_post),
                           paste(control_list_indiv_post),
                           paste("+ fcountry + fethnic_id")))


# estimate coefficients
m1a <- lm(fmla1a, data=afro.master)
m1b <- lm(fmla1b, data=afro.master)
m1c <- lm(fmla1c, data=afro.master)
m1d <- lm(fmla1d, data=afro.master)
m2a <- lm(fmla2a, data=afro.master)
m2b <- lm(fmla2b, data=afro.master)
m2c <- lm(fmla2c, data=afro.master)
m2d <- lm(fmla2d, data=afro.master)
m3a <- lm(fmla3a, data=afro.master)
m3b <- lm(fmla3b, data=afro.master)
m3c <- lm(fmla3c, data=afro.master)
m3d <- lm(fmla3d, data=afro.master)


# Huber-White heteroskedasticity-robust standard error calculation and table 
# generation code for lm and glm models in R. Written by Joshua Gubler ~  http://joshuagubler.com.
robustse.f <- function(model, cluster, df_correction) {
  
  require(sandwich)
  require(lmtest)
  require(multiwayvcov)
  if(missing(cluster)) {
    name <- deparse(substitute(model))
    modelname <- paste(name,"rob",sep=".")
    model$se <- coeftest(model, vcov=vcovHC(model,"HC1"))[,2]
    model$vcovHC <- vcovHC(model,"HC1")
    assign(modelname,model,envir = .GlobalEnv)
    coeftest(model, vcov=vcovHC(model,"HC1"))
  } else {
    name <- deparse(substitute(model))
    modelname <- paste(name,"rob",sep=".")
    vcovCL <- cluster.vcov(model, cluster, df_correction = df_correction)
    model$vcovCL <- vcovCL
    modelname <- paste(name,"clustrob",sep=".")
    model$se <- coeftest(model, vcovCL)[,2]
    assign(modelname,model,envir = .GlobalEnv)
    coeftest(model, vcovCL)
  }
}


robustse.f(m1a, cluster = afro.master$gid, df_correction = F)

# effect plots for each model
first <- predictorEffect('distw05_new', m1a.clustrob, xlevels=list(exclusion=c(1, 2, 3, 4)))


one <- plot(first, lines=list(multiline=TRUE, lwd = 2, lty = 1:3), 
            confint=list(style="auto"))
one <- update(one,scales=list(cex=1.25), xlab = list('Historical exposure to statehood (LHCE)', cex=1.5),
              ylab = list('Predicted value of "pay taxes"', cex=1.5), main = list(label = " ", cex = 1.5))



robustse.f(m2a, cluster = afro.master$gid, df_correction = F)

second <- predictorEffect('distw05_new', m2a.clustrob, xlevels=list(exclusion=c(1, 2, 3, 4)))


two <- plot(second, lines=list(multiline=TRUE, lwd = 2, lty = 1:3), 
            confint=list(style="auto"))
two <- update(two,scales=list(cex=1.25), xlab = list('Historical exposure to statehood (LHCE)', cex=1.5),
              ylab = list('Predicted value of "obey courts"', cex=1.5), main = list(label = " ", cex = 1.5))


robustse.f(m3a, cluster = afro.master$gid, df_correction = F)


third <- predictorEffect('distw05_new', m3a.clustrob, xlevels=list(exclusion=c(1, 2, 3, 4)))


three <- plot(third, lines=list(multiline=TRUE, lwd = 2, lty = 1:3), 
            confint=list(style="auto"))
three <- update(three,scales=list(cex=1.25), xlab = list('Historical exposure to statehood (LHCE)', cex=1.5),
              ylab = list('Predicted value of "abide by the law"', cex=1.5), main = list(label = " ", cex = 1.5))

grid.arrange(one, two, three, ncol=3)

#~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~

#######################################################################
# Figure 4: Difference-In-Differences Estimates for Individual Vaccines
#######################################################################

# loading the DHS dataset (this has to be changed based on where replication dataset is located)

# note that DHS data has to be downloaded after requesting permission from the DHS Program directly
# https://dhsprogram.com/data/

DHS <- read.csv('DHS_data_replication.csv')

DHS.data <- subset(DHS, DHS$year == 2006 | DHS$year == 2011)

# BCG
BCG <- felm(BCG ~ Buganda*period + urban + delivery_place + education + radio + wealth_index +
              religion + occupation +  age_mother | 0 | 0 | CASEID, DHS.data)
summary(BCG)

# measles
measles <- felm(measles ~ Buganda*period + urban + delivery_place + education + radio + wealth_index +
                  religion + occupation +  age_mother | 0 | 0 | CASEID, DHS.data)
summary(measles)

# DPT1
DPT1 <- felm(DPT1 ~ Buganda*period + urban + delivery_place + education + radio + wealth_index +
               religion + occupation +  age_mother | 0 | 0 | CASEID, DHS.data)
summary(DPT1)

# DPT2
DPT2 <- felm(DPT2 ~ Buganda*period + urban + delivery_place + education + radio + wealth_index +
               religion + occupation +  age_mother | 0 | 0 | CASEID, DHS.data)
summary(DPT2)

# DPT3
DPT3 <- felm(DPT3 ~ Buganda*period + urban + delivery_place + education + radio + wealth_index +
               religion + occupation +  age_mother | 0 | 0 | CASEID, DHS.data)
summary(DPT3)

# polio 0
polio0 <- felm(polio0 ~ Buganda*period + urban + delivery_place + education + radio + wealth_index +
                 religion + occupation +  age_mother | 0 | 0 | CASEID, DHS.data)
summary(polio0)

# polio 1
polio1 <- felm(polio1 ~ Buganda*period + urban + delivery_place + education + radio + wealth_index +
                 religion + occupation +  age_mother | 0 | 0 | CASEID, DHS.data)
summary(polio1)

# polio 2
polio2 <- felm(polio2 ~ Buganda*period + urban + delivery_place + education + radio + wealth_index +
                 religion + occupation +  age_mother | 0 | 0 | CASEID, DHS.data)
summary(polio2)

# polio 3
polio3 <- felm(polio3 ~ Buganda*period + urban + delivery_place + education + radio + wealth_index +
                 religion + occupation +  age_mother | 0 | 0 | CASEID, DHS.data)
summary(polio3)


# visualization via a coefficient plot
term <- c('BCG', 'measles', 'DPT1', 'DPT2', 'DPT3', 'polio0', 'polio1', 'polio2', 'polio3')

estimate <- c(BCG$coefficients[49], measles$coefficients[49], DPT1$coefficients[49],
              DPT2$coefficients[49], DPT3$coefficients[49], polio0$coefficients[49],
              polio1$coefficients[49], polio2$coefficients[49], polio3$coefficients[49])

std.error <- c(BCG$cse[49], measles$cse[49], DPT1$cse[49],
               DPT2$cse[49], DPT3$cse[49], polio0$cse[49],
               polio1$cse[49], polio2$cse[49], polio3$cse[49])

model <- c('Model 1', 'Model 2', 'Model 3', 'Model 4', 'Model 5', 'Model 6', 'Model 7', 'Model 8', 'Model9')

data <- as.data.frame(cbind(term, estimate, std.error, model))

data$term <- as.character(data$term)
data$estimate <- as.numeric(data$estimate)
data$std.error <- as.numeric(std.error)


dwplot(data, dot_args = list(size = 4, pch = 19), whisker_args = list(lwd = 0.75, colour = 'black')) +
  theme_classic(base_size = 17 ) +
  geom_vline(xintercept = 0, colour = "red", linetype = 3) + theme(legend.position='none')


#~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~

######################################################################
# Figure 5: Full Vaccination Rates in Buganda and Non-Buganda Areas
# Between 1995 and 2016
######################################################################

# loading the DHS dataset (this has to be changed based on where replication dataset is located)

# note that DHS data has to be downloaded after requesting permission from the DHS Program directly
# https://dhsprogram.com/data/

DHS <- read.csv('DHS_data_replication.csv')


DHS$year <- as.factor(DHS$year)

dd_sample_stand <- DHS %>% 
  group_by(year, Buganda) %>% 
  mutate(mean_vacc = mean(vaccination, na.rm  =T)) %>% 
  mutate(se = 1.96*sqrt(mean_vacc*(1-mean_vacc)/length(mean_vacc))) %>% 
  ungroup() %>% 
  mutate(year = forcats::fct_relevel(year, c("1995", "2000", "2006", "2011", "2016"))) 


vaccination_plot <- ggplot(dd_sample_stand %>% 
                             ungroup() %>%  
                             filter(!is.na(Buganda)), 
                           aes(x = year, 
                               y = vaccination, 
                               fill = factor(Buganda), 
                               color = factor(Buganda))) + 
  
  geom_errorbar(aes(ymin=mean_vacc-se, ymax=mean_vacc+se), width=.1) + 
  
  geom_line(data = dd_sample_stand %>% 
              group_by(year, Buganda) %>% 
              filter(row_number() == min(row_number())) %>% 
              filter(!is.na(Buganda)), 
            aes(group = factor(Buganda), 
                y = mean_vacc),
            position = position_jitterdodge(jitter.width = 0, 
                                            dodge.width = 0)) +
  geom_point(data = dd_sample_stand %>% 
               group_by(year, Buganda) %>% 
               filter(row_number() == min(row_number())) %>% 
               filter(!is.na(Buganda)), 
             aes(x = year, 
                 fill = factor(Buganda),
                 y = mean_vacc), 
             color= "black",
             position = position_jitterdodge(jitter.width = 0, 
                                             dodge.width = 0),
             size = 1, pch = c(21, 22, 21, 22, 21, 22, 22, 21, 22, 21), inherit.aes = T) +
  
  ylim(0, 0.4) + 
  
  theme_bw(base_size = 11) +
  guides(fill=FALSE) +
  theme(legend.position="none") +
  
  labs(y = "Proportion Fully Vaccinated", x = "DHS Survey Period", 
       caption = "Blue circles = Buganda; red squares = the rest of Uganda.")

vaccination_plot

#~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~
